### INIT CODE ###
# # # # # Section 01 - Libraries ---------------------------------------------
library("plotly")
library("htmlwidgets")
library("knitr")
library("agricolae") # Tukey test
library("dplyr") # Developing with %>%
# Import files from xlsx
library("plotly") # Advanced graphical functionsRscience - Parameterized Report
Work environment
- Operating System: Linux - 6.8.0-85-generic
- R Version: 4.4.3
- Rscience Version: Paquete ‘stats’: 4.4.3
- Core Statistical Tool: General Linnear Model, Fixed Effect, anova 1 way.
- Core Script: Script 001
- UTC Time: 2025-10-21 20:25:04 UTC
- System/Computer Time: 2025-10-21 22:25:04 CEST
- Inferred Timezone Location: Europe/Rome
(R can only reliably determine the timezone, not the physical city/country.)
Cita
Data Analysis
my_dataset <- mtcars
var_name_rv <- "mpg"
var_name_factor <- "cyl"
alpha_value <- 0.05
# # # # # Section 02 - Import dataset ----------------------------------------
#---my_dataset <- _A_my_import_sentence_A_
head(x = my_dataset, n = 5) mpg cyl disp hp drat wt qsec vs am gear carb
Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
# # # # # Section 03 - Settings ----------------------------------------------
#---var_name_rv <- "_B_var_name_rv_B_"
#---var_name_factor <- "_B_var_name_factor_B_"
#---alpha_value <- _B_alpha_value_B_
# # # # # Section 04 - Standard actions --------------------------------------
# The factor must be factor data type on R.
my_dataset[,var_name_factor] <- as.factor(as.character(my_dataset[,var_name_factor]))
# # # # # Section 05 - Alpha and confidence value ----------------------------
confidence_value <- 1 - alpha_value
df_alpha_confidence <- data.frame(
"order" = 1:2,
"detail" = c("alpha value", "confidence value"),
"probability" = c(alpha_value, confidence_value),
"percentaje" = paste0(c(alpha_value, confidence_value)*100, "%")
)
df_alpha_confidence order detail probability percentaje
1 1 alpha value 0.05 5%
2 2 confidence value 0.95 95%
# # # # # Section 06 - Selected variables and roles -------------------------
vector_all_var_names <- colnames(my_dataset)
vector_name_selected_vars <- c(var_name_rv, var_name_factor)
vector_rol_vars <- c("VR", "FACTOR")
df_selected_vars <- data.frame(
"order" = 1:length(vector_name_selected_vars),
"var_name" = vector_name_selected_vars,
"var_number" = match(vector_name_selected_vars, vector_all_var_names),
"var_letter" = openxlsx::int2col(match(vector_name_selected_vars, vector_all_var_names)),
"var_role" = vector_rol_vars,
"doble_reference" = paste0(vector_rol_vars, "(", vector_name_selected_vars, ")")
)
df_selected_vars order var_name var_number var_letter var_role doble_reference
1 1 mpg 1 A VR VR(mpg)
2 2 cyl 2 B FACTOR FACTOR(cyl)
# # # # # Section 05 - minibase ------------------------------------------------
# Only selected variabless.
# Only completed rows.
# Factor columns as factor object in R.
minibase <- na.omit(my_dataset[vector_name_selected_vars])
colnames(minibase) <- vector_rol_vars
minibase[,"FACTOR"] <- as.factor(minibase[,"FACTOR"])
head(x = minibase, n = 5) VR FACTOR
Mazda RX4 21.0 6
Mazda RX4 Wag 21.0 6
Datsun 710 22.8 4
Hornet 4 Drive 21.4 6
Hornet Sportabout 18.7 8
# # # Anova control
# 'VR' must be numeric and 'FACTOR must be factor.
df_control_minibase <- data.frame(
"order" = 1:nrow(df_selected_vars),
"var_name" = df_selected_vars$"var_name",
"var_role" = df_selected_vars$"var_role",
"control" = c("is.numeric()", "is.factor()"),
"verify" = c(is.numeric(minibase[,"VR"]), is.factor(minibase[,"FACTOR"]))
)
df_control_minibase order var_name var_role control verify
1 1 mpg VR is.numeric() TRUE
2 2 cyl FACTOR is.factor() TRUE
# # # my_dataset and minibase reps
# Our 'n' is from minibase
df_show_n <- data.frame(
"object" = c("my_dataset", "minibase"),
"n_col" = c(ncol(my_dataset), ncol(minibase)),
"n_row" = c(nrow(my_dataset), nrow(minibase))
)
df_show_n object n_col n_row
1 my_dataset 11 32
2 minibase 2 32
# # # Factor info
# Default order for levels its alphabetic order.
df_factor_info <- data.frame(
"order" = 1:nlevels(minibase[,2]),
"level" = levels(minibase[,2]),
"n" = as.vector(table(minibase[,2])),
"mean" = tapply(minibase[,1], minibase[,2], mean),
"color" = rainbow(nlevels(minibase[,2]))
)
rownames(df_factor_info) <- 1:nrow(df_factor_info)
df_factor_info order level n mean color
1 1 4 11 26.66364 #FF0000
2 2 6 7 19.74286 #00FF00
3 3 8 14 15.10000 #0000FF
# # # Unbalanced reps for levels?
# Important information for Tukey.
# If reps its equal or not equal in all levels must be detailled
# on Tukey.
check_unbalanced_reps <- length(unique(df_factor_info$n)) > 1
check_unbalanced_reps[1] TRUE
phrase_yes_unbalanced <- "The design is unbalanced in repetitions. A correction is applied to the Tukey test."
phrase_no_unbalanced <- "The design is unbalanced in replicates. A correction should be applied to the Tukey test."
phrase_selected_check_unbalanced <- ifelse(test = check_unbalanced_reps,
yes = phrase_yes_unbalanced,
no = phrase_no_unbalanced)
phrase_selected_check_unbalanced[1] "The design is unbalanced in repetitions. A correction is applied to the Tukey test."
# # # # # Section 06 - Anova Test ----------------------------------------------
# # # Anova test
lm_anova <- lm(VR ~ FACTOR, data = minibase) # Linear model
aov_anova <- aov(lm_anova) # R results for anova
df_table_anova <- as.data.frame(summary(aov_anova)[[1]]) # Common anova table
df_table_anova Df Sum Sq Mean Sq F value Pr(>F)
FACTOR 2 824.7846 412.39230 39.69752 4.978919e-09
Residuals 29 301.2626 10.38837 NA NA
# # # Standard error from model for each level
model_error_var_MSE <- df_table_anova$`Mean Sq`[2]
model_error_sd <- sqrt(model_error_var_MSE)
df_model_error <- data.frame(
"order" = df_factor_info$order,
"level" = df_factor_info$level,
"n" = df_factor_info$n,
"model_error_var_MSE" = model_error_var_MSE,
"model_error_sd" = model_error_sd
)
df_model_error["model_error_se"] <- df_model_error["model_error_sd"]/sqrt(df_model_error$n)
df_model_error order level n model_error_var_MSE model_error_sd model_error_se
1 1 4 11 10.38837 3.223099 0.9718008
2 2 6 7 10.38837 3.223099 1.2182168
3 3 8 14 10.38837 3.223099 0.8614094
# # # # # Section 07 - minibase_mod --------------------------------------------
# # # Detect rows on my_dataset there are on minibase
dt_rows_my_dataset_ok <- rowSums(!is.na(my_dataset[vector_name_selected_vars])) == ncol(minibase)
# # # Object minibase_mod and new cols
minibase_mod <- minibase
minibase_mod$"lvl_order_number" <- as.numeric(minibase_mod[,2])
minibase_mod$"lvl_color" <- df_factor_info$color[minibase_mod$"lvl_order_number"]
minibase_mod$"fitted.values" <- df_factor_info$"mean"[minibase_mod$"lvl_order_number"]
minibase_mod$"residuals" <- lm_anova$residuals
minibase_mod$"id_my_dataset" <- c(1:nrow(my_dataset))[dt_rows_my_dataset_ok]
minibase_mod$"id_minibase" <- 1:nrow(minibase)
minibase_mod$"studres" <- minibase_mod$"residuals"/model_error_sd
# # # # # Section 08 - Requeriments for residuals-------------------------------
# # # Normality test (Shapiro-Wilk)
test_residuals_normality <- shapiro.test(minibase_mod$residuals)
test_residuals_normality
Shapiro-Wilk normality test
data: minibase_mod$residuals
W = 0.97065, p-value = 0.5177
# # # Homogeinidy test (Bartlett)
test_residuals_homogeneity <- bartlett.test(residuals ~ FACTOR, data = minibase_mod)
test_residuals_homogeneity
Bartlett test of homogeneity of variances
data: residuals by FACTOR
Bartlett's K-squared = 8.3934, df = 2, p-value = 0.01505
# # # Residuals variance from levels from original residuals
df_raw_error <- data.frame(
"order" = 1:nlevels(minibase_mod[,2]),
"level" = levels(minibase_mod[,2]),
"n" = tapply(minibase_mod$residuals, minibase_mod[,2], length),
"raw_error_var" = tapply(minibase_mod$residuals, minibase_mod[,2], var),
"raw_error_sd" = tapply(minibase_mod$residuals, minibase_mod[,2], sd)
)
df_model_error["raw_error_se"] <- df_model_error["model_error_sd"]/sqrt(df_model_error$"n")
rownames(df_raw_error) <- 1:nrow(df_raw_error)
df_raw_error order level n raw_error_var raw_error_sd
1 1 4 11 20.338545 4.509828
2 2 6 7 2.112857 1.453567
3 3 8 14 6.553846 2.560048
phrase_info_errors <- c("Anova and Tukey use MSE from model.",
"Bartlett use variance from raw error on each level.",
"Only if there is homogeneity from raw error variances then is a good idea take desition from MSE.")
phrase_info_errors[1] "Anova and Tukey use MSE from model."
[2] "Bartlett use variance from raw error on each level."
[3] "Only if there is homogeneity from raw error variances then is a good idea take desition from MSE."
# # # Sum for residuals
sum_residuals <- sum(minibase_mod$residuals)
sum_residuals[1] -3.330669e-16
# # # Mean for residuals
mean_residuals <- mean(minibase_mod$residuals)
mean_residuals[1] -1.040834e-17
##################################
p_value_shapiro <- test_residuals_normality$p.value
p_value_bartlett <- test_residuals_homogeneity$p.value
p_value_anova <- df_table_anova$"Pr(>F)"[1]
vector_p_value <- c(p_value_shapiro, p_value_bartlett, p_value_anova)
vector_logic_rejected <- vector_p_value < alpha_value
vector_ho_decision <- ifelse(test = vector_logic_rejected, yes = "Ho Rejected", "Ho no rejected")
vector_ho_rejected <- ifelse(test = vector_logic_rejected, yes = "Yes", "No")
df_summary_anova <- data.frame(
"test" = c("Shapiro-Wilk test", "Bartlett test", "Anova 1 way"),
"aim" = c("Normality", "Homogeneity", "Mean"),
"variable" = c("residuals", "residuals", var_name_rv),
"p_value" = vector_p_value,
"alpha_value" = c(alpha_value, alpha_value, alpha_value),
"Decision" = vector_ho_decision
)
df_summary_anova test aim variable p_value alpha_value
1 Shapiro-Wilk test Normality residuals 5.176650e-01 0.05
2 Bartlett test Homogeneity residuals 1.504518e-02 0.05
3 Anova 1 way Mean mpg 4.978919e-09 0.05
Decision
1 Ho no rejected
2 Ho Rejected
3 Ho Rejected
check_shapiro_rejected <- p_value_shapiro < alpha_value
phrase_shapiro_yes_rejected <- "The null hypothesis of normal distribution of residuals is rejected."
phrase_shapiro_no_rejected <- "The null hypothesis of normal distribution of residuals is not rejected."
phrase_shapiro_selected <- ifelse(test = check_shapiro_rejected,
yes = phrase_shapiro_yes_rejected,
no = phrase_shapiro_no_rejected)
phrase_shapiro_selected [1] "The null hypothesis of normal distribution of residuals is not rejected."
check_bartlett_rejected <- p_value_bartlett < alpha_value
phrase_bartlett_yes_rejected <- "The hypothesis of homogeneity of variances (homoscedasticity) is rejected."
phrase_bartlett_no_rejected <- "The hypothesis of homogeneity of variances (homoscedasticity) is not rejected."
phrase_bartlett_selected <- ifelse(test = check_bartlett_rejected,
yes = phrase_bartlett_yes_rejected,
no = phrase_bartlett_no_rejected)
phrase_bartlett_selected[1] "The hypothesis of homogeneity of variances (homoscedasticity) is rejected."
check_ok_all_requeriments <- sum(vector_logic_rejected[c(1,2)]) == 2
phrase_requeriments_yes_valid <- "All residual assumptions are met, so it is valid to draw conclusions from the ANOVA test."
phrase_requeriments_no_valid <- "Not all model assumptions are met, so it is NOT valid to draw conclusions from the ANOVA test."
phrase_requeriments_selected <- ifelse(test = check_ok_all_requeriments,
yes = phrase_requeriments_yes_valid,
no = phrase_requeriments_no_valid)
phrase_requeriments_selected [1] "Not all model assumptions are met, so it is NOT valid to draw conclusions from the ANOVA test."
check_anova_rejected <- p_value_anova < alpha_value
phrase_anova_yes_rejected <- "The null hypothesis of the ANOVA test is rejected. There are statistically significant differences in at least one level of the factor."
phrase_anova_no_rejected <- "The null hypothesis of the ANOVA test is not rejected. All levels of the factor are statistically equal."
phrase_anova_selected <- ifelse(test = check_anova_rejected,
yes = phrase_anova_yes_rejected,
no = phrase_anova_no_rejected)
phrase_anova_selected <- ifelse(
test = check_ok_all_requeriments,
yes = phrase_anova_selected,
no = "Regardless of the p-value obtained in ANOVA, it is not valid to draw conclusions."
)
phrase_anova_selected[1] "Regardless of the p-value obtained in ANOVA, it is not valid to draw conclusions."
##############################################################################
tukey01_full_groups <- agricolae::HSD.test(y = lm_anova,
trt = colnames(minibase_mod)[2],
alpha = alpha_value,
group = TRUE,
console = FALSE,
unbalanced = check_unbalanced_reps)
# # # Tukey test - Tukey pairs comparation - Full version
tukey02_full_pairs <- agricolae::HSD.test(y = lm_anova,
trt = colnames(minibase_mod)[2],
alpha = alpha_value,
group = FALSE,
console = FALSE,
unbalanced = check_unbalanced_reps)
# # Original table from R about Tukey
df_tukey_original_table <- tukey01_full_groups$groups
df_tukey_original_table VR groups
4 26.66364 a
6 19.74286 b
8 15.10000 c
# # # New table about Tukey
df_tukey_table <- data.frame(
"level" = rownames(tukey01_full_groups$groups),
"mean" = tukey01_full_groups$groups[,1],
"group" = tukey01_full_groups$groups[,2]
)
df_tukey_table level mean group
1 4 26.66364 a
2 6 19.74286 b
3 8 15.10000 c
# # # # # Section 10 - Partitioned Measures (VR)--------------------------------
# # # Partitioned Measures of Position (VR)
df_vr_position_levels <- data.frame(
"order" = 1:nlevels(minibase[,2]),
"level" = levels(minibase[,2]),
"min" = tapply(minibase[,1], minibase[,2], min),
"mean" = tapply(minibase[,1], minibase[,2], mean),
"Q1" = tapply(minibase[,1], minibase[,2], quantile, 0.25),
"median" = tapply(minibase[,1], minibase[,2], median),
"Q3" = tapply(minibase[,1], minibase[,2], quantile, 0.75),
"max" = tapply(minibase[,1], minibase[,2], max),
"n" = tapply(minibase[,1], minibase[,2], length)
)
# # # Partitioned Measures of Dispersion (VR)
df_vr_dispersion_levels <- data.frame(
"order" = 1:nlevels(minibase[,2]),
"level" = levels(minibase[,2]),
"range" = tapply(minibase[,1], minibase[,2], function(x){max(x) - min(x)}),
"variance" = tapply(minibase[,1], minibase[,2], var),
"standard_deviation" = tapply(minibase[,1], minibase[,2], sd),
"standard_error" = tapply(minibase[,1], minibase[,2], function(x){sd(x)/sqrt(length(x))}),
"n" = tapply(minibase[,1], minibase[,2], length)
)
df_vr_dispersion_levels order level range variance standard_deviation standard_error n
4 1 4 12.5 20.338545 4.509828 1.3597642 11
6 2 6 3.6 2.112857 1.453567 0.5493967 7
8 3 8 8.8 6.553846 2.560048 0.6842016 14
# # # General Measures of Position (VR)
df_vr_position_general <- data.frame(
"min" = min(minibase[,1]),
"mean" = mean(minibase[,1]),
"median" = median(minibase[,1]),
"max" = max(minibase[,1]),
"n" = length(minibase[,1])
)
df_vr_position_general min mean median max n
1 10.4 20.09062 19.2 33.9 32
# # # General Measures of Dispersion (VR)
df_vr_dispersion_general <- data.frame(
"range" = max(minibase[,1]) - min(minibase[,1]),
"variance" = var(minibase[,1]),
"standard_deviation" = sd(minibase[,1]),
"standard_error" = sd(minibase[,1])/(sqrt(length(minibase[,1]))),
"n" = length(minibase[,1])
)
df_vr_dispersion_general range variance standard_deviation standard_error n
1 23.5 36.3241 6.026948 1.065424 32
# # # # # Section 11 - Partitioned Measures (Residuals)-------------------------
# # # Partitioned Measures of Position (residuals)
df_residuals_position_levels <- data.frame(
"order" = 1:nlevels(minibase_mod[,2]),
"level" = levels(minibase_mod[,2]),
"min" = tapply(minibase_mod$residuals, minibase_mod[,2], min),
"mean" = tapply(minibase_mod$residuals, minibase_mod[,2], mean),
"median" = tapply(minibase_mod$residuals, minibase_mod[,2], median),
"max" = tapply(minibase_mod$residuals, minibase_mod[,2], max),
"n" = tapply(minibase_mod$residuals, minibase_mod[,2], length)
)
df_residuals_position_levels order level min mean median max n
4 1 4 -5.263636 -4.037175e-17 -0.66363636 7.236364 11
6 2 6 -1.942857 -7.943233e-18 -0.04285714 1.657143 7
8 3 8 -4.700000 1.193252e-17 0.10000000 4.100000 14
# # # Partitioned Measures of Dispersion (residuals)
df_residual_dispersion_levels <- data.frame(
"order" = 1:nlevels(minibase_mod[,2]),
"level" = levels(minibase_mod[,2]),
"range" = tapply(minibase_mod$residuals, minibase_mod[,2], function(x){max(x) - min(x)}),
"variance" = tapply(minibase_mod$residuals, minibase_mod[,2], var),
"standard_deviation" = tapply(minibase_mod$residuals, minibase_mod[,2], sd),
"standard_error" = tapply(minibase_mod$residuals, minibase_mod[,2], function(x){sd(x)/sqrt(length(x))}),
"n" = tapply(minibase_mod$residuals, minibase_mod[,2], length)
)
df_residual_dispersion_levels order level range variance standard_deviation standard_error n
4 1 4 12.5 20.338545 4.509828 1.3597642 11
6 2 6 3.6 2.112857 1.453567 0.5493967 7
8 3 8 8.8 6.553846 2.560048 0.6842016 14
# # # General Measures of Position (residuals)
df_residuals_position_general <- data.frame(
"min" = min(minibase_mod$residuals),
"mean" = mean(minibase_mod$residuals),
"median" = median(minibase_mod$residuals),
"max" = max(minibase_mod$residuals),
"n" = length(minibase_mod$residuals)
)
df_residuals_position_general min mean median max n
1 -5.263636 -1.040834e-17 0.02857143 7.236364 32
# # # General Measures of Dispersion (residuals)
df_residuals_dispersion_general <- data.frame(
"range" = max(minibase_mod$residuals) - min(minibase_mod$residuals),
"variance" = var(minibase_mod$residuals),
"standard_deviation" = sd(minibase_mod$residuals),
"standard_error" = sd(minibase_mod$residuals)/(sqrt(length(minibase_mod$residuals))),
"n" = length(minibase_mod$residuals)
)
df_residuals_dispersion_general range variance standard_deviation standard_error n
1 12.5 9.718148 3.117394 0.5510827 32
# # # # # Section 12 - Model estimators ----------------------------------------
# # # Means for each level
vector_est_mu_i <- df_vr_position_levels$mean
vector_est_mu_i[1] 26.66364 19.74286 15.10000
# # # Mean of means
est_mu <- mean(vector_est_mu_i)
vector_est_mu <- rep(est_mu, length(vector_est_mu_i))
vector_est_mu[1] 20.50216 20.50216 20.50216
# # # Tau efects
vector_est_tau_i <- vector_est_mu_i - vector_est_mu
vector_est_tau_i[1] 6.1614719 -0.7593074 -5.4021645
# # # Sum of tau efects
sum_est_tau_i <- sum(vector_est_tau_i)
sum_est_tau_i[1] -5.329071e-15
# # # Long model information on dataframe
df_anova1way_model_long <- data.frame(
"order" = df_factor_info$order,
"level" = df_factor_info$level,
"n" = df_factor_info$n,
"est_mu" = vector_est_mu,
"est_tau_i" = vector_est_tau_i
)
df_anova1way_model_long order level n est_mu est_tau_i
1 1 4 11 20.50216 6.1614719
2 2 6 7 20.50216 -0.7593074
3 3 8 14 20.50216 -5.4021645
# # # Short model information on dataframe
df_anova1way_model_short <- data.frame(
"order" = df_factor_info$order,
"level" = df_factor_info$level,
"n" = df_factor_info$n,
"est_mu_i" = vector_est_mu_i
)
df_anova1way_model_short order level n est_mu_i
1 1 4 11 26.66364
2 2 6 7 19.74286
3 3 8 14 15.10000
# # # # # Section 13 - Special table to plots ----------------------------------
# # # Table for plot001
df_table_factor_plot001 <- data.frame(
"order" = df_factor_info$order,
"level" = df_factor_info$level,
"n" = df_factor_info$n,
"mean" = tapply(minibase[,1], minibase[,2], mean),
"min" = tapply(minibase[,1], minibase[,2], min),
"max" = tapply(minibase[,1], minibase[,2], max),
"sd" = tapply(minibase[,1], minibase[,2], sd),
"var" = tapply(minibase[,1], minibase[,2], var)
)
df_table_factor_plot002 <- data.frame(
"order" = df_factor_info$order,
"level" = df_factor_info$level,
"n" = df_factor_info$n,
"mean" = tapply(minibase[,1], minibase[,2], mean),
"model_error_sd" = df_model_error$model_error_sd
)
df_table_factor_plot002["lower_limit"] <- df_table_factor_plot002$mean - df_table_factor_plot002$model_error_sd
df_table_factor_plot002["upper_limmit"] <- df_table_factor_plot002$mean + df_table_factor_plot002$model_error_sd
df_table_factor_plot002["color"] <- df_factor_info$color
df_table_factor_plot002 order level n mean model_error_sd lower_limit upper_limmit color
4 1 4 11 26.66364 3.223099 23.44054 29.88674 #FF0000
6 2 6 7 19.74286 3.223099 16.51976 22.96596 #00FF00
8 3 8 14 15.10000 3.223099 11.87690 18.32310 #0000FF
df_table_factor_plot003 <- data.frame(
"order" = df_factor_info$order,
"level" = df_factor_info$level,
"n" = df_factor_info$n,
"mean" = tapply(minibase[,1], minibase[,2], mean),
"model_error_se" = df_model_error$model_error_se
)
df_table_factor_plot003["lower_limit"] <- df_table_factor_plot003$mean - df_table_factor_plot003$model_error_se
df_table_factor_plot003["upper_limmit"] <- df_table_factor_plot003$mean + df_table_factor_plot003$model_error_se
df_table_factor_plot003["color"] <- df_factor_info$color
df_table_factor_plot003 order level n mean model_error_se lower_limit upper_limmit color
4 1 4 11 26.66364 0.9718008 25.69184 27.63544 #FF0000
6 2 6 7 19.74286 1.2182168 18.52464 20.96107 #00FF00
8 3 8 14 15.10000 0.8614094 14.23859 15.96141 #0000FF
# # # Table for plot004
df_table_factor_plot004 <- df_vr_position_levels
df_table_factor_plot004["color"] <- df_factor_info$color
# # # Table for plot005
df_table_factor_plot005 <- df_table_factor_plot004
# # # Table for plot006
df_table_factor_plot006 <- df_table_factor_plot004
df_table_factor_plot007 <- df_table_factor_plot003
correct_pos_letters <- order(df_tukey_table$level)
vector_letters <- df_tukey_table$group[correct_pos_letters]
df_table_factor_plot007["group"] <- vector_letters
# # # Table for plot006
df_table_residuals_plot001 <- data.frame(
"order" = 1:nlevels(minibase_mod[,2]),
"level" = levels(minibase_mod[,2]),
"n" = tapply(minibase_mod$residuals, minibase_mod[,2], length),
"min" = tapply(minibase_mod$residuals, minibase_mod[,2], min),
"mean" = tapply(minibase_mod$residuals, minibase_mod[,2], mean),
"max" = tapply(minibase_mod$residuals, minibase_mod[,2], max),
"var" = tapply(minibase_mod$residuals, minibase_mod[,2], var),
"sd" = tapply(minibase_mod$residuals, minibase_mod[,2], sd),
"color" = df_factor_info$color
)
df_table_residuals_plot001 order level n min mean max var sd color
4 1 4 11 -5.263636 -4.037175e-17 7.236364 20.338545 4.509828 #FF0000
6 2 6 7 -1.942857 -7.943233e-18 1.657143 2.112857 1.453567 #00FF00
8 3 8 14 -4.700000 1.193252e-17 4.100000 6.553846 2.560048 #0000FF
# # # Table for plot006
df_table_residuals_plot002 <- df_table_residuals_plot001
# # # Table for plot006
df_table_residuals_plot003 <- df_table_residuals_plot001
# # # Table for plot006
df_table_residuals_plot004 <- data.frame(
"variable" = "residuals",
"n" = length(minibase_mod$residuals),
"min" = min(minibase_mod$residuals),
"mean" = mean(minibase_mod$residuals),
"max" = max(minibase_mod$residuals),
"var" = var(minibase_mod$residuals),
"sd" = sd(minibase_mod$residuals),
"model_error_var_MSE" = model_error_var_MSE,
"model_error_sd" = model_error_sd
)
phrase_coment_errors <- "Model Error (MSE) "
# # # Table for plot006
df_table_residuals_plot005 <- df_table_residuals_plot004
# # # Table for plot006
df_table_residuals_plot006 <- data.frame(
"order" = 1:nlevels(minibase_mod[,2]),
"level" = levels(minibase_mod[,2]),
"n" = tapply(minibase_mod$studres, minibase_mod[,2], length),
"min" = tapply(minibase_mod$studres, minibase_mod[,2], min),
"mean" = tapply(minibase_mod$studres, minibase_mod[,2], mean),
"max" = tapply(minibase_mod$studres, minibase_mod[,2], max),
"var" = tapply(minibase_mod$studres, minibase_mod[,2], var),
"sd" = tapply(minibase_mod$studres, minibase_mod[,2], sd),
"color" = df_factor_info$color
)
# # # Table for plot006
df_table_residuals_plot007 <- df_table_residuals_plot006
df_table_residuals_plot008 <- data.frame(
"variable" = "studres",
"n" = length(minibase_mod$studres),
"min" = min(minibase_mod$studres),
"mean" = mean(minibase_mod$studres),
"max" = max(minibase_mod$studres),
"var" = var(minibase_mod$studres),
"sd" = sd(minibase_mod$studres)
)
df_table_residuals_plot009 <- df_table_residuals_plot008
df_table_residuals_plot010 <- df_table_residuals_plot008
#############################################################
# # # Create a new plot...
plot001_factor <- plotly::plot_ly()
# # # Plot001 - Scatter plot for VR and FACTOR on minibase_mod *****************
plot001_factor <- plotly::add_trace(p = plot001_factor,
type = "scatter",
mode = "markers",
x = minibase_mod$FACTOR,
y = minibase_mod$VR,
color = minibase_mod$FACTOR,
colors = df_factor_info$color,
marker = list(size = 15, opacity = 0.7))
# # # Title and settings...
plot001_factor <- plotly::layout(p = plot001_factor,
title = "Plot 001 - Scatterplot",
font = list(size = 20),
margin = list(t = 100))
# # # Without zerolines
plot001_factor <- plotly::layout(p = plot001_factor,
xaxis = list(zeroline = FALSE),
yaxis = list(zeroline = FALSE))
# # # Plot output
plot001_factor ##############################################################################
# # # Create a new plot...
plot002_factor <- plot_ly()
# # # Adding errors...
plot002_factor <- add_trace(p = plot002_factor,
type = "scatter",
mode = "markers",
x = df_table_factor_plot002$level,
y = df_table_factor_plot002$mean,
color = df_table_factor_plot002$level,
colors = df_table_factor_plot002$color,
marker = list(symbol = "line-ew-open",
size = 50,
opacity = 1,
line = list(width = 5)),
error_y = list(type = "data", array = df_table_factor_plot002$model_error_sd)
)
# # # Title and settings...
plot002_factor <- plotly::layout(p = plot002_factor,
title = "Plot 002 - Mean and model standard deviation",
font = list(size = 20),
margin = list(t = 100))
# # # Without zerolines
plot002_factor <-plotly::layout(p = plot002_factor,
xaxis = list(zeroline = FALSE),
yaxis = list(zeroline = FALSE))
# # # Plot output
plot002_factor ##############################################################################
# # # Create a new plot...
plot003_factor <- plotly::plot_ly()
# # # Adding errors...
plot003_factor <- plotly::add_trace(p = plot003_factor,
type = "scatter",
mode = "markers",
x = df_table_factor_plot003$level,
y = df_table_factor_plot003$mean,
color = df_table_factor_plot003$level,
colors = df_table_factor_plot003$color,
marker = list(symbol = "line-ew-open",
size = 50,
opacity = 1,
line = list(width = 5)),
error_y = list(type = "data", array = df_table_factor_plot003$model_error_se)
)
# # # Title and settings...
plot003_factor <- plotly::layout(p = plot003_factor,
title = "Plot 003 - Mean y model standard error",
font = list(size = 20),
margin = list(t = 100))
# # # Without zerolines
plot003_factor <-plotly::layout(p = plot003_factor,
xaxis = list(zeroline = FALSE),
yaxis = list(zeroline = FALSE))
# # # Plot output
plot003_factor ##############################################################################
# # # New plotly...
plot004_factor <- plotly::plot_ly()
# # # Boxplot and info...
plot004_factor <- plotly::add_trace(p = plot004_factor,
type = "box",
x = df_table_factor_plot004$level ,
color = df_table_factor_plot004$level,
colors = df_table_factor_plot004$color,
lowerfence = df_table_factor_plot004$min,
q1 = df_table_factor_plot004$Q1,
median = df_table_factor_plot004$median,
q3 = df_table_factor_plot004$Q3,
upperfence = df_table_factor_plot004$max,
boxmean = TRUE,
boxpoints = FALSE,
line = list(color = "black", width = 3)
)
# # # Title and settings...
plot004_factor <- plotly::layout(p = plot004_factor,
title = "Plot 004 - Boxplot and means",
font = list(size = 20),
margin = list(t = 100))
# # # Without zerolines...
plot004_factor <- plotly::layout(p = plot004_factor,
xaxis = list(zeroline = FALSE),
yaxis = list(zeroline = FALSE))
# # # Output plot004_anova...
plot004_factor ##############################################################################
all_levels <- levels(minibase_mod[,2])
n_levels <- length(all_levels)
all_color <- rainbow(length(all_levels))
plot005_factor <- plot_ly()
# Violinplot
for (k in 1:n_levels){
# Selected values
selected_level <- all_levels[k]
selected_color <- all_color[k]
dt_filas <- minibase_mod[,2] == selected_level
# Plotting selected violinplot
plot005_factor <- plot005_factor %>%
add_trace(x = minibase_mod[,2][dt_filas],
y = minibase_mod[,1][dt_filas],
type = "violin",
name = paste0("violin", k),
points = "all",
marker = list(color = selected_color),
line = list(color = selected_color),
fillcolor = I(selected_color)
)
}
# Boxplot
plot005_factor <- plotly::add_trace(p = plot005_factor,
type = "box",
name = "boxplot",
x = df_table_factor_plot005$level ,
color = df_table_factor_plot005$level ,
lowerfence = df_table_factor_plot005$min,
q1 = df_table_factor_plot005$Q1,
median = df_table_factor_plot005$median,
q3 = df_table_factor_plot005$Q3,
upperfence = df_table_factor_plot005$max,
boxmean = TRUE,
boxpoints = TRUE,
fillcolor = df_table_factor_plot005$color,
line = list(color = "black", width = 3),
opacity = 0.5,
width = 0.2)
# # # Title and settings...
plot005_factor <- plotly::layout(p = plot005_factor,
title = "Plot 005 - Violinplot",
font = list(size = 20),
margin = list(t = 100))
# # # Without zerolines...
plot005_factor <- plotly::layout(p = plot005_factor,
xaxis = list(zeroline = FALSE),
yaxis = list(zeroline = FALSE))
# # # Output plot003_anova...
plot005_factor ##############################################################################
#library(plotly)
plot006_factor <- plotly::plot_ly()
# Add traces
plot006_factor <- plotly::add_trace(p = plot006_factor,
type = "violin",
y = minibase_mod$VR,
x = minibase_mod$FACTOR,
showlegend = TRUE,
side = "positive",
points = "all",
name = "Violinplot",
color = minibase_mod$FACTOR,
colors = df_table_factor_plot006$color)
# # # Title and settings...
plot006_factor <- plotly::layout(p = plot006_factor,
title = "Plot 006 - Scatterplot + Jitter + Smoothed",
font = list(size = 20),
margin = list(t = 100))
# # # Without zerolines...
plot006_factor <- plotly::layout(p = plot006_factor,
xaxis = list(zeroline = FALSE),
yaxis = list(zeroline = FALSE))
# # # Output plot003_anova...
plot006_factor ##############################################################################
# # # Create a new plot...
plot007_factor <- plotly::plot_ly()
# # # Adding errors...
plot007_factor <- plotly::add_trace(p = plot007_factor,
type = "scatter",
mode = "markers",
x = df_table_factor_plot007$level,
y = df_table_factor_plot007$mean,
color = df_table_factor_plot007$level,
colors = df_table_factor_plot007$color,
marker = list(symbol = "line-ew-open",
size = 50,
opacity = 1,
line = list(width = 5)),
error_y = list(type = "data", array = df_table_factor_plot007$model_error_se)
)
plot007_factor <- add_text(p = plot007_factor,
x = df_table_factor_plot007$level,
y = df_table_factor_plot007$mean,
text = df_table_factor_plot007$group, name = "Tukey Group",
size = 20)
# # # Title and settings...
plot007_factor <- plotly::layout(p = plot007_factor,
title = "Plot 007 - Mean y model standard error",
font = list(size = 20),
margin = list(t = 100))
# # # Without zerolines
plot007_factor <-plotly::layout(p = plot007_factor,
xaxis = list(zeroline = FALSE),
yaxis = list(zeroline = FALSE))
# # # Plot output
plot007_factor #######----
####### DESDE ACAAAAAAAAAAAAAAAAAAAA
# # # Create a new plot...
plot001_residuals <- plotly::plot_ly()
# # # Plot001 - Scatter plot for VR and FACTOR on minibase_mod *****************
plot001_residuals <- plotly::add_trace(p = plot001_residuals,
type = "scatter",
mode = "markers",
x = minibase_mod$FACTOR,
y = minibase_mod$residuals,
color = minibase_mod$FACTOR,
colors = df_factor_info$color,
marker = list(size = 15, opacity = 0.7))
# # # Title and settings...
plot001_residuals <- plotly::layout(p = plot001_residuals,
title = "Plot 001 - Scatterplot - Residuals",
font = list(size = 20),
margin = list(t = 100))
# # # Without zerolines
plot001_residuals <- plotly::layout(p = plot001_residuals,
xaxis = list(zeroline = FALSE),
yaxis = list(zeroline = TRUE))
# # # Plot output
plot001_residuals #library(plotly)
plot002_residuals <- plotly::plot_ly()
# Add traces
plot002_residuals <- plotly::add_trace(p = plot002_residuals,
type = "violin",
y = minibase_mod$residuals,
x = minibase_mod$FACTOR,
showlegend = TRUE,
side = "positive",
points = "all",
name = "Violinplot",
color = minibase_mod$FACTOR,
colors = df_table_residuals_plot002$color)
# # # Title and settings...
plot002_residuals <- plotly::layout(p = plot002_residuals,
title = "Plot 002 - Residuals - Scatterplot + Jitter + Smoothed",
font = list(size = 20),
margin = list(t = 100))
# # # Without zerolines...
plot002_residuals <- plotly::layout(p = plot002_residuals,
xaxis = list(zeroline = FALSE),
yaxis = list(zeroline = FALSE))
# # # Output plot003_anova...
plot002_residuals plot003_residuals <- plotly::plot_ly()
# Add traces
plot003_residuals <- plotly::add_trace(p = plot003_residuals,
type = "violin",
x = minibase_mod$residuals,
showlegend = TRUE,
side = "positive",
points = FALSE,
#name = levels(minibase_mod$FACTOR)[minibase_mod$lvl_order_number],
color = minibase_mod$FACTOR,
colors = df_table_residuals_plot003$color)
# # # Title and settings...
plot003_residuals <- plotly::layout(p = plot003_residuals,
title = "Plot 003 - Residuals - Scatterplot + Jitter + Smoothed",
font = list(size = 20),
margin = list(t = 100))
# # # Without zerolines...
plot003residuals <- plotly::layout(p = plot003_residuals,
xaxis = list(zeroline = FALSE),
yaxis = list(zeroline = FALSE))
# # # Output plot003_anova...
plot003_residuals plot004_residuals <- plotly::plot_ly()
# Add traces
plot004_residuals <- plotly::add_trace(p = plot004_residuals,
type = "violin",
x = minibase_mod$residuals,
#x = minibase_mod$FACTOR,
showlegend = TRUE,
side = "positive",
points = "all",
name = " ")#
#color = minibase_mod$FACTOR,
#colors = df_table_factor_plot006$color)
# # # Title and settings...
plot004_residuals <- plotly::layout(p = plot004_residuals,
title = "Plot 004 - Residuals",
font = list(size = 20),
margin = list(t = 100))
# # # Without zerolines...
plot004_residuals <- plotly::layout(p = plot004_residuals,
xaxis = list(zeroline = TRUE),
yaxis = list(zeroline = FALSE))
# # # Output plot003_anova...
plot004_residuals # - el 5
qq_info <- EnvStats::qqPlot(x = minibase_mod$residuals, plot.it = F,
param.list = list(mean = mean(minibase_mod$residuals),
sd = sd(minibase_mod$residuals)))
cuantiles_teoricos <- qq_info$x
cuantiles_observados <- qq_info$y
#library(plotly)
plot005_residuals <- plotly::plot_ly()
# Crear el gráfico QQ plot
plot005_residuals <-add_trace(p = plot005_residuals,
x = cuantiles_teoricos,
y = cuantiles_observados,
type = 'scatter', mode = 'markers',
marker = list(color = 'blue'),
name = "points")
# Agregar la línea de identidad
pendiente <- 1
intercepto <- 0
# Calcular las coordenadas de los extremos de la línea de identidad
x_extremos <- range(cuantiles_teoricos)
y_extremos <- pendiente * x_extremos + intercepto
# Agregar la recta de identidad
plot005_residuals <- add_trace(p = plot005_residuals,
x = x_extremos,
y = y_extremos,
type = 'scatter',
mode = 'lines',
line = list(color = 'red'),
name = "identity")
# Establecer etiquetas de los ejes
# plot007_residuals <- layout(p = plot007_residuals,
# xaxis = list(title = 'Expected quantiles'),
# yaxis = list(title = 'Observed quantiles'))
plot005_residuals <- plotly::layout(p = plot005_residuals,
title = "Plot 005 - QQ Plot Residuals",
font = list(size = 20),
margin = list(t = 100))
# Mostrar el gráfico
plot005_residuals # - Fin el 5
# # # Create a new plot...
plot006_residuals <- plotly::plot_ly()
# # # Plot001 - Scatter plot for VR and FACTOR on minibase_mod *****************
plot006_residuals <- plotly::add_trace(p = plot006_residuals,
type = "scatter",
mode = "markers",
x = minibase_mod$fitted.values,
y = minibase_mod$residuals,
color = minibase_mod$FACTOR,
colors = df_factor_info$color,
marker = list(size = 15, opacity = 0.7))
# # # Title and settings...
plot006_residuals <- plotly::layout(p = plot006_residuals,
title = "Plot 006 - Scatterplot - Residuals vs Fitted.values",
font = list(size = 20),
margin = list(t = 100))
# # # Without zerolines
plot006_residuals <- plotly::layout(p = plot006_residuals,
xaxis = list(zeroline = FALSE),
yaxis = list(zeroline = TRUE))
# # # Plot output
plot006_residuals # # # Create a new plot...
plot007_residuals <- plotly::plot_ly()
# # # Plot001 - Scatter plot for VR and FACTOR on minibase_mod *****************
plot007_residuals <- plotly::add_trace(p = plot007_residuals,
type = "scatter",
mode = "markers",
x = minibase_mod$FACTOR,
y = minibase_mod$studres,
color = minibase_mod$FACTOR,
colors = df_factor_info$color,
marker = list(size = 15, opacity = 0.7))
# # # Title and settings...
plot007_residuals <- plotly::layout(p = plot007_residuals,
title = "Plot 007 - Scatterplot - Studentized Residuals",
font = list(size = 20),
margin = list(t = 100))
# # # Without zerolines
plot007_residuals <- plotly::layout(p = plot007_residuals,
xaxis = list(zeroline = FALSE),
yaxis = list(zeroline = TRUE))
# # # Plot output
plot007_residuals #library(plotly)
plot008_residuals <- plotly::plot_ly()
# Add traces
plot008_residuals <- plotly::add_trace(p = plot008_residuals,
type = "violin",
x = minibase_mod$studres,
#x = minibase_mod$FACTOR,
showlegend = TRUE,
side = "positive",
points = "all",
name = " ")#
#color = minibase_mod$FACTOR,
#colors = df_table_factor_plot006$color)
# # # Title and settings...
plot008_residuals <- plotly::layout(p = plot008_residuals,
title = "Plot 008 - Studentized Residuals",
font = list(size = 20),
margin = list(t = 100))
# # # Without zerolines...
plot008_residuals <- plotly::layout(p = plot008_residuals,
xaxis = list(zeroline = TRUE),
yaxis = list(zeroline = FALSE))
# # # Output plot003_anova...
plot008_residuals # el 9
x <- seq(-4, 4, length.out = 100)
y <- dnorm(x, mean = 0, 1)
# x <- x*model_error_sd
densidad_suavizada <- density(x, kernel = "gaussian", adjust = 0.5)
hist_data_studres <- hist(minibase_mod$studres, plot = FALSE)
hist_data_studres$"rel_frec" <- hist_data_studres$counts/sum(hist_data_studres$counts)
densidad_studres <- density(x = minibase_mod$studres, kernel = "gaussian", adjust =0.5)
#library(plotly)
plot009_residuals <- plotly::plot_ly()
# plot005_residuals <- add_trace(p = plot005_residuals,
# x = densidad_studres$x,
# y = densidad_studres$y,
# type = 'scatter',
# mode = 'lines',
# name = 'densidad_studres')
plot009_residuals <- add_trace(p = plot009_residuals,
x = x,
y = y,
type = 'scatter',
mode = 'lines',
name = 'Normal Standard')
# # Add traces
# plot005_residuals <- plotly::add_trace(p = plot005_residuals,
# type = "violin",
# x = minibase_mod$residuals,
# #x = minibase_mod$FACTOR,
# showlegend = TRUE,
# side = "positive",
# points = FALSE,
# name = "violinplot")#
plot009_residuals <- plotly::add_trace(p = plot009_residuals,
type = "bar",
x = hist_data_studres$"mids",
y = hist_data_studres$"density",
name = "hist - studres")
plot009_residuals <- plotly::layout(p = plot009_residuals,
bargap = 0)
plot009_residuals <- plotly::layout(p = plot009_residuals,
title = "Plot 009 - Studres Distribution",
font = list(size = 20),
margin = list(t = 100))
plot009_residuals # fin el 9
qq_info <- EnvStats::qqPlot(x = minibase_mod$studres, plot.it = F,
param.list = list(mean = 0,
sd = 1))
cuantiles_teoricos <- qq_info$x
cuantiles_observados <- qq_info$y
#library(plotly)
plot010_residuals <- plotly::plot_ly()
# Crear el gráfico QQ plot
plot010_residuals <-add_trace(p = plot010_residuals,
x = cuantiles_teoricos,
y = cuantiles_observados,
type = 'scatter', mode = 'markers',
marker = list(color = 'blue'),
name = "points")
# Agregar la línea de identidad
pendiente <- 1
intercepto <- 0
# Calcular las coordenadas de los extremos de la línea de identidad
x_extremos <- range(cuantiles_teoricos)
y_extremos <- pendiente * x_extremos + intercepto
# Agregar la recta de identidad
plot010_residuals <- add_trace(p = plot010_residuals,
x = x_extremos,
y = y_extremos,
type = 'scatter',
mode = 'lines',
line = list(color = 'red'),
name = "identity")
# Establecer etiquetas de los ejes
# plot007_residuals <- layout(p = plot007_residuals,
# xaxis = list(title = 'Expected quantiles'),
# yaxis = list(title = 'Observed quantiles'))
plot010_residuals <- plotly::layout(p = plot010_residuals,
title = "Plot 010 - QQ Plot - studres",
font = list(size = 20),
margin = list(t = 100))
# Mostrar el gráfico
plot010_residualsReport (Automated Statistical Assistant)
- Dataset file name: jaja
- Response Variable: mpg
- Factor: cyl
- Alpha value: 0.05
The null hypothesis of normal distribution of residuals is not rejected.
The hypothesis of homogeneity of variances (homoscedasticity) is rejected.
Not all model assumptions are met, so it is NOT valid to draw conclusions from the ANOVA test.
Regardless of the p-value obtained in ANOVA, it is not valid to draw conclusions.
1) Anova test
| test | aim | variable | p_value | alpha_value | Decision |
|---|---|---|---|---|---|
| Shapiro-Wilk test | Normality | residuals | 0.5176650 | 0.05 | Ho no rejected |
| Bartlett test | Homogeneity | residuals | 0.0150452 | 0.05 | Ho Rejected |
| Anova 1 way | Mean | mpg | 0.0000000 | 0.05 | Ho Rejected |
This section details the statistical hypotheses for the tests used to validate the assumptions and primary conclusion of the One-Way Analysis of Variance (ANOVA).
One-Way ANOVA Hypotheses
The one-way ANOVA (Analysis of Variance) test determines if there are any statistically significant differences between the means of three or more independent groups.
| Hypothesis Type | Notation | English Statement |
|---|---|---|
| Null Hypothesis | \(H_0\) | The means of all population groups are equal. |
| Alternative Hypothesis | \(H_a\) | At least one group mean is different from the others. |
Null Hypothesis Equation: \[\mu_1 = \mu_2 = \mu_3 = \dots = \mu_k\]
Shapiro-Wilk Test (Residual Normality)
The Shapiro-Wilk test is used to check if the sample data (the ANOVA residuals) follows a normal distribution, which is a required assumption for the ANOVA test.
| Hypothesis Type | Notation | English Statement |
|---|---|---|
| Null Hypothesis | \(H_0\) | The data (residuals) are drawn from a normally distributed population. |
| Alternative Hypothesis | \(H_a\) | The data (residuals) are not drawn from a normally distributed population. |
Levene’s Test (Homogeneity of Variances)
Levene’s test is used to verify the assumption of homoscedasticity ((^2_1 = ^2_2 = = ^2_k)) across the different groups being compared in the ANOVA.
| Hypothesis Type | Notation | English Statement |
|---|---|---|
| Null Hypothesis | \(H_0\) | The population variances of the groups are equal. |
| Alternative Hypothesis | \(H_a\) | The population variances of the groups are not equal. |
1) Requeriment - Normaility test - Residuals
$test_residuals_normality
Shapiro-Wilk normality test
data: minibase_mod$residuals
W = 0.97065, p-value = 0.5177
2) Requeriment - Homogeneity test - Residuals
$test_residuals_homogeneity
Bartlett test of homogeneity of variances
data: residuals by FACTOR
Bartlett's K-squared = 8.3934, df = 2, p-value = 0.01505
3) Estimated variances - Residuals
$df_model_error
order level n model_error_var_MSE model_error_sd model_error_se raw_error_se
1 1 4 11 10.38837 3.223099 0.9718008 0.9718008
2 2 6 7 10.38837 3.223099 1.2182168 1.2182168
3 3 8 14 10.38837 3.223099 0.8614094 0.8614094
$df_raw_error
order level n raw_error_var raw_error_sd
1 1 4 11 20.338545 4.509828
2 2 6 7 2.112857 1.453567
3 3 8 14 6.553846 2.560048
$phrase_info_errors
[1] "Anova and Tukey use MSE from model."
[2] "Bartlett use variance from raw error on each level."
[3] "Only if there is homogeneity from raw error variances then is a good idea take desition from MSE."
$df_table_factor_plot001
order level n mean min max sd var
4 1 4 11 26.66364 21.4 33.9 4.509828 20.338545
6 2 6 7 19.74286 17.8 21.4 1.453567 2.112857
8 3 8 14 15.10000 10.4 19.2 2.560048 6.553846
$df_table_factor_plot002
order level n mean model_error_sd lower_limit upper_limmit color
4 1 4 11 26.66364 3.223099 23.44054 29.88674 #FF0000
6 2 6 7 19.74286 3.223099 16.51976 22.96596 #00FF00
8 3 8 14 15.10000 3.223099 11.87690 18.32310 #0000FF
$df_table_factor_plot003
order level n mean model_error_se lower_limit upper_limmit color
4 1 4 11 26.66364 0.9718008 25.69184 27.63544 #FF0000
6 2 6 7 19.74286 1.2182168 18.52464 20.96107 #00FF00
8 3 8 14 15.10000 0.8614094 14.23859 15.96141 #0000FF
$df_table_factor_plot004
order level min mean Q1 median Q3 max n color
4 1 4 21.4 26.66364 22.80 26.0 30.40 33.9 11 #FF0000
6 2 6 17.8 19.74286 18.65 19.7 21.00 21.4 7 #00FF00
8 3 8 10.4 15.10000 14.40 15.2 16.25 19.2 14 #0000FF
$df_table_factor_plot005
order level min mean Q1 median Q3 max n color
4 1 4 21.4 26.66364 22.80 26.0 30.40 33.9 11 #FF0000
6 2 6 17.8 19.74286 18.65 19.7 21.00 21.4 7 #00FF00
8 3 8 10.4 15.10000 14.40 15.2 16.25 19.2 14 #0000FF
$df_table_factor_plot006
order level min mean Q1 median Q3 max n color
4 1 4 21.4 26.66364 22.80 26.0 30.40 33.9 11 #FF0000
6 2 6 17.8 19.74286 18.65 19.7 21.00 21.4 7 #00FF00
8 3 8 10.4 15.10000 14.40 15.2 16.25 19.2 14 #0000FF
order level n mean model_error_se lower_limit upper_limmit color group
4 1 4 11 26.66364 0.9718008 25.69184 27.63544 #FF0000 a
6 2 6 7 19.74286 1.2182168 18.52464 20.96107 #00FF00 b
8 3 8 14 15.10000 0.8614094 14.23859 15.96141 #0000FF c
$df_table_residuals_plot001
order level n min mean max var sd color
4 1 4 11 -5.263636 -4.037175e-17 7.236364 20.338545 4.509828 #FF0000
6 2 6 7 -1.942857 -7.943233e-18 1.657143 2.112857 1.453567 #00FF00
8 3 8 14 -4.700000 1.193252e-17 4.100000 6.553846 2.560048 #0000FF
$df_table_residuals_plot002
order level n min mean max var sd color
4 1 4 11 -5.263636 -4.037175e-17 7.236364 20.338545 4.509828 #FF0000
6 2 6 7 -1.942857 -7.943233e-18 1.657143 2.112857 1.453567 #00FF00
8 3 8 14 -4.700000 1.193252e-17 4.100000 6.553846 2.560048 #0000FF
$df_table_residuals_plot003
order level n min mean max var sd color
4 1 4 11 -5.263636 -4.037175e-17 7.236364 20.338545 4.509828 #FF0000
6 2 6 7 -1.942857 -7.943233e-18 1.657143 2.112857 1.453567 #00FF00
8 3 8 14 -4.700000 1.193252e-17 4.100000 6.553846 2.560048 #0000FF
$df_table_residuals_plot004
variable n min mean max var sd
1 residuals 32 -5.263636 -1.040834e-17 7.236364 9.718148 3.117394
model_error_var_MSE model_error_sd
1 10.38837 3.223099
$df_table_residuals_plot005
variable n min mean max var sd
1 residuals 32 -5.263636 -1.040834e-17 7.236364 9.718148 3.117394
model_error_var_MSE model_error_sd
1 10.38837 3.223099
$df_table_residuals_plot006
order level n min mean max var sd color
4 1 4 11 -1.6330981 -2.020559e-17 2.2451573 1.9578196 1.3992211 #FF0000
6 2 6 7 -0.6027917 -2.723816e-18 0.5141459 0.2033869 0.4509843 #00FF00
8 3 8 14 -1.4582240 -1.256150e-18 1.2720678 0.6308833 0.7942816 #0000FF
$df_table_residuals_plot007
order level n min mean max var sd color
4 1 4 11 -1.6330981 -2.020559e-17 2.2451573 1.9578196 1.3992211 #FF0000
6 2 6 7 -0.6027917 -2.723816e-18 0.5141459 0.2033869 0.4509843 #00FF00
8 3 8 14 -1.4582240 -1.256150e-18 1.2720678 0.6308833 0.7942816 #0000FF
$df_table_residuals_plot008
variable n min mean max var sd
1 studres 32 -1.633098 -8.104411e-18 2.245157 0.9354839 0.9672042
$df_table_residuals_plot009
variable n min mean max var sd
1 studres 32 -1.633098 -8.104411e-18 2.245157 0.9354839 0.9672042
$df_table_residuals_plot010
variable n min mean max var sd
1 studres 32 -1.633098 -8.104411e-18 2.245157 0.9354839 0.9672042
### INIT CODE ###
# # # # # Section 01 - Libraries ---------------------------------------------
library("plotly")
library("htmlwidgets")
library("knitr")
library("agricolae") # Tukey test
library("dplyr") # Developing with %>%
# Import files from xlsx
library("plotly") # Advanced graphical functions
my_dataset <- mtcars
var_name_rv <- "mpg"
var_name_factor <- "cyl"
alpha_value <- 0.05
# # # # # Section 02 - Import dataset ----------------------------------------
#---my_dataset <- _A_my_import_sentence_A_
head(x = my_dataset, n = 5)
# # # # # Section 03 - Settings ----------------------------------------------
#---var_name_rv <- "_B_var_name_rv_B_"
#---var_name_factor <- "_B_var_name_factor_B_"
#---alpha_value <- _B_alpha_value_B_
# # # # # Section 04 - Standard actions --------------------------------------
# The factor must be factor data type on R.
my_dataset[,var_name_factor] <- as.factor(as.character(my_dataset[,var_name_factor]))
# # # # # Section 05 - Alpha and confidence value ----------------------------
confidence_value <- 1 - alpha_value
df_alpha_confidence <- data.frame(
"order" = 1:2,
"detail" = c("alpha value", "confidence value"),
"probability" = c(alpha_value, confidence_value),
"percentaje" = paste0(c(alpha_value, confidence_value)*100, "%")
)
df_alpha_confidence
# # # # # Section 06 - Selected variables and roles -------------------------
vector_all_var_names <- colnames(my_dataset)
vector_name_selected_vars <- c(var_name_rv, var_name_factor)
vector_rol_vars <- c("VR", "FACTOR")
df_selected_vars <- data.frame(
"order" = 1:length(vector_name_selected_vars),
"var_name" = vector_name_selected_vars,
"var_number" = match(vector_name_selected_vars, vector_all_var_names),
"var_letter" = openxlsx::int2col(match(vector_name_selected_vars, vector_all_var_names)),
"var_role" = vector_rol_vars,
"doble_reference" = paste0(vector_rol_vars, "(", vector_name_selected_vars, ")")
)
df_selected_vars
# # # # # Section 05 - minibase ------------------------------------------------
# Only selected variabless.
# Only completed rows.
# Factor columns as factor object in R.
minibase <- na.omit(my_dataset[vector_name_selected_vars])
colnames(minibase) <- vector_rol_vars
minibase[,"FACTOR"] <- as.factor(minibase[,"FACTOR"])
head(x = minibase, n = 5)
# # # Anova control
# 'VR' must be numeric and 'FACTOR must be factor.
df_control_minibase <- data.frame(
"order" = 1:nrow(df_selected_vars),
"var_name" = df_selected_vars$"var_name",
"var_role" = df_selected_vars$"var_role",
"control" = c("is.numeric()", "is.factor()"),
"verify" = c(is.numeric(minibase[,"VR"]), is.factor(minibase[,"FACTOR"]))
)
df_control_minibase
# # # my_dataset and minibase reps
# Our 'n' is from minibase
df_show_n <- data.frame(
"object" = c("my_dataset", "minibase"),
"n_col" = c(ncol(my_dataset), ncol(minibase)),
"n_row" = c(nrow(my_dataset), nrow(minibase))
)
df_show_n
# # # Factor info
# Default order for levels its alphabetic order.
df_factor_info <- data.frame(
"order" = 1:nlevels(minibase[,2]),
"level" = levels(minibase[,2]),
"n" = as.vector(table(minibase[,2])),
"mean" = tapply(minibase[,1], minibase[,2], mean),
"color" = rainbow(nlevels(minibase[,2]))
)
rownames(df_factor_info) <- 1:nrow(df_factor_info)
df_factor_info
# # # Unbalanced reps for levels?
# Important information for Tukey.
# If reps its equal or not equal in all levels must be detailled
# on Tukey.
check_unbalanced_reps <- length(unique(df_factor_info$n)) > 1
check_unbalanced_reps
phrase_yes_unbalanced <- "The design is unbalanced in repetitions. A correction is applied to the Tukey test."
phrase_no_unbalanced <- "The design is unbalanced in replicates. A correction should be applied to the Tukey test."
phrase_selected_check_unbalanced <- ifelse(test = check_unbalanced_reps,
yes = phrase_yes_unbalanced,
no = phrase_no_unbalanced)
phrase_selected_check_unbalanced
# # # # # Section 06 - Anova Test ----------------------------------------------
# # # Anova test
lm_anova <- lm(VR ~ FACTOR, data = minibase) # Linear model
aov_anova <- aov(lm_anova) # R results for anova
df_table_anova <- as.data.frame(summary(aov_anova)[[1]]) # Common anova table
df_table_anova
# # # Standard error from model for each level
model_error_var_MSE <- df_table_anova$`Mean Sq`[2]
model_error_sd <- sqrt(model_error_var_MSE)
df_model_error <- data.frame(
"order" = df_factor_info$order,
"level" = df_factor_info$level,
"n" = df_factor_info$n,
"model_error_var_MSE" = model_error_var_MSE,
"model_error_sd" = model_error_sd
)
df_model_error["model_error_se"] <- df_model_error["model_error_sd"]/sqrt(df_model_error$n)
df_model_error
# # # # # Section 07 - minibase_mod --------------------------------------------
# # # Detect rows on my_dataset there are on minibase
dt_rows_my_dataset_ok <- rowSums(!is.na(my_dataset[vector_name_selected_vars])) == ncol(minibase)
# # # Object minibase_mod and new cols
minibase_mod <- minibase
minibase_mod$"lvl_order_number" <- as.numeric(minibase_mod[,2])
minibase_mod$"lvl_color" <- df_factor_info$color[minibase_mod$"lvl_order_number"]
minibase_mod$"fitted.values" <- df_factor_info$"mean"[minibase_mod$"lvl_order_number"]
minibase_mod$"residuals" <- lm_anova$residuals
minibase_mod$"id_my_dataset" <- c(1:nrow(my_dataset))[dt_rows_my_dataset_ok]
minibase_mod$"id_minibase" <- 1:nrow(minibase)
minibase_mod$"studres" <- minibase_mod$"residuals"/model_error_sd
# # # # # Section 08 - Requeriments for residuals-------------------------------
# # # Normality test (Shapiro-Wilk)
test_residuals_normality <- shapiro.test(minibase_mod$residuals)
test_residuals_normality
# # # Homogeinidy test (Bartlett)
test_residuals_homogeneity <- bartlett.test(residuals ~ FACTOR, data = minibase_mod)
test_residuals_homogeneity
# # # Residuals variance from levels from original residuals
df_raw_error <- data.frame(
"order" = 1:nlevels(minibase_mod[,2]),
"level" = levels(minibase_mod[,2]),
"n" = tapply(minibase_mod$residuals, minibase_mod[,2], length),
"raw_error_var" = tapply(minibase_mod$residuals, minibase_mod[,2], var),
"raw_error_sd" = tapply(minibase_mod$residuals, minibase_mod[,2], sd)
)
df_model_error["raw_error_se"] <- df_model_error["model_error_sd"]/sqrt(df_model_error$"n")
rownames(df_raw_error) <- 1:nrow(df_raw_error)
df_raw_error
phrase_info_errors <- c("Anova and Tukey use MSE from model.",
"Bartlett use variance from raw error on each level.",
"Only if there is homogeneity from raw error variances then is a good idea take desition from MSE.")
phrase_info_errors
# # # Sum for residuals
sum_residuals <- sum(minibase_mod$residuals)
sum_residuals
# # # Mean for residuals
mean_residuals <- mean(minibase_mod$residuals)
mean_residuals
##################################
p_value_shapiro <- test_residuals_normality$p.value
p_value_bartlett <- test_residuals_homogeneity$p.value
p_value_anova <- df_table_anova$"Pr(>F)"[1]
vector_p_value <- c(p_value_shapiro, p_value_bartlett, p_value_anova)
vector_logic_rejected <- vector_p_value < alpha_value
vector_ho_decision <- ifelse(test = vector_logic_rejected, yes = "Ho Rejected", "Ho no rejected")
vector_ho_rejected <- ifelse(test = vector_logic_rejected, yes = "Yes", "No")
df_summary_anova <- data.frame(
"test" = c("Shapiro-Wilk test", "Bartlett test", "Anova 1 way"),
"aim" = c("Normality", "Homogeneity", "Mean"),
"variable" = c("residuals", "residuals", var_name_rv),
"p_value" = vector_p_value,
"alpha_value" = c(alpha_value, alpha_value, alpha_value),
"Decision" = vector_ho_decision
)
df_summary_anova
check_shapiro_rejected <- p_value_shapiro < alpha_value
phrase_shapiro_yes_rejected <- "The null hypothesis of normal distribution of residuals is rejected."
phrase_shapiro_no_rejected <- "The null hypothesis of normal distribution of residuals is not rejected."
phrase_shapiro_selected <- ifelse(test = check_shapiro_rejected,
yes = phrase_shapiro_yes_rejected,
no = phrase_shapiro_no_rejected)
phrase_shapiro_selected
check_bartlett_rejected <- p_value_bartlett < alpha_value
phrase_bartlett_yes_rejected <- "The hypothesis of homogeneity of variances (homoscedasticity) is rejected."
phrase_bartlett_no_rejected <- "The hypothesis of homogeneity of variances (homoscedasticity) is not rejected."
phrase_bartlett_selected <- ifelse(test = check_bartlett_rejected,
yes = phrase_bartlett_yes_rejected,
no = phrase_bartlett_no_rejected)
phrase_bartlett_selected
check_ok_all_requeriments <- sum(vector_logic_rejected[c(1,2)]) == 2
phrase_requeriments_yes_valid <- "All residual assumptions are met, so it is valid to draw conclusions from the ANOVA test."
phrase_requeriments_no_valid <- "Not all model assumptions are met, so it is NOT valid to draw conclusions from the ANOVA test."
phrase_requeriments_selected <- ifelse(test = check_ok_all_requeriments,
yes = phrase_requeriments_yes_valid,
no = phrase_requeriments_no_valid)
phrase_requeriments_selected
check_anova_rejected <- p_value_anova < alpha_value
phrase_anova_yes_rejected <- "The null hypothesis of the ANOVA test is rejected. There are statistically significant differences in at least one level of the factor."
phrase_anova_no_rejected <- "The null hypothesis of the ANOVA test is not rejected. All levels of the factor are statistically equal."
phrase_anova_selected <- ifelse(test = check_anova_rejected,
yes = phrase_anova_yes_rejected,
no = phrase_anova_no_rejected)
phrase_anova_selected <- ifelse(
test = check_ok_all_requeriments,
yes = phrase_anova_selected,
no = "Regardless of the p-value obtained in ANOVA, it is not valid to draw conclusions."
)
phrase_anova_selected
##############################################################################
tukey01_full_groups <- agricolae::HSD.test(y = lm_anova,
trt = colnames(minibase_mod)[2],
alpha = alpha_value,
group = TRUE,
console = FALSE,
unbalanced = check_unbalanced_reps)
# # # Tukey test - Tukey pairs comparation - Full version
tukey02_full_pairs <- agricolae::HSD.test(y = lm_anova,
trt = colnames(minibase_mod)[2],
alpha = alpha_value,
group = FALSE,
console = FALSE,
unbalanced = check_unbalanced_reps)
# # Original table from R about Tukey
df_tukey_original_table <- tukey01_full_groups$groups
df_tukey_original_table
# # # New table about Tukey
df_tukey_table <- data.frame(
"level" = rownames(tukey01_full_groups$groups),
"mean" = tukey01_full_groups$groups[,1],
"group" = tukey01_full_groups$groups[,2]
)
df_tukey_table
# # # # # Section 10 - Partitioned Measures (VR)--------------------------------
# # # Partitioned Measures of Position (VR)
df_vr_position_levels <- data.frame(
"order" = 1:nlevels(minibase[,2]),
"level" = levels(minibase[,2]),
"min" = tapply(minibase[,1], minibase[,2], min),
"mean" = tapply(minibase[,1], minibase[,2], mean),
"Q1" = tapply(minibase[,1], minibase[,2], quantile, 0.25),
"median" = tapply(minibase[,1], minibase[,2], median),
"Q3" = tapply(minibase[,1], minibase[,2], quantile, 0.75),
"max" = tapply(minibase[,1], minibase[,2], max),
"n" = tapply(minibase[,1], minibase[,2], length)
)
# # # Partitioned Measures of Dispersion (VR)
df_vr_dispersion_levels <- data.frame(
"order" = 1:nlevels(minibase[,2]),
"level" = levels(minibase[,2]),
"range" = tapply(minibase[,1], minibase[,2], function(x){max(x) - min(x)}),
"variance" = tapply(minibase[,1], minibase[,2], var),
"standard_deviation" = tapply(minibase[,1], minibase[,2], sd),
"standard_error" = tapply(minibase[,1], minibase[,2], function(x){sd(x)/sqrt(length(x))}),
"n" = tapply(minibase[,1], minibase[,2], length)
)
df_vr_dispersion_levels
# # # General Measures of Position (VR)
df_vr_position_general <- data.frame(
"min" = min(minibase[,1]),
"mean" = mean(minibase[,1]),
"median" = median(minibase[,1]),
"max" = max(minibase[,1]),
"n" = length(minibase[,1])
)
df_vr_position_general
# # # General Measures of Dispersion (VR)
df_vr_dispersion_general <- data.frame(
"range" = max(minibase[,1]) - min(minibase[,1]),
"variance" = var(minibase[,1]),
"standard_deviation" = sd(minibase[,1]),
"standard_error" = sd(minibase[,1])/(sqrt(length(minibase[,1]))),
"n" = length(minibase[,1])
)
df_vr_dispersion_general
# # # # # Section 11 - Partitioned Measures (Residuals)-------------------------
# # # Partitioned Measures of Position (residuals)
df_residuals_position_levels <- data.frame(
"order" = 1:nlevels(minibase_mod[,2]),
"level" = levels(minibase_mod[,2]),
"min" = tapply(minibase_mod$residuals, minibase_mod[,2], min),
"mean" = tapply(minibase_mod$residuals, minibase_mod[,2], mean),
"median" = tapply(minibase_mod$residuals, minibase_mod[,2], median),
"max" = tapply(minibase_mod$residuals, minibase_mod[,2], max),
"n" = tapply(minibase_mod$residuals, minibase_mod[,2], length)
)
df_residuals_position_levels
# # # Partitioned Measures of Dispersion (residuals)
df_residual_dispersion_levels <- data.frame(
"order" = 1:nlevels(minibase_mod[,2]),
"level" = levels(minibase_mod[,2]),
"range" = tapply(minibase_mod$residuals, minibase_mod[,2], function(x){max(x) - min(x)}),
"variance" = tapply(minibase_mod$residuals, minibase_mod[,2], var),
"standard_deviation" = tapply(minibase_mod$residuals, minibase_mod[,2], sd),
"standard_error" = tapply(minibase_mod$residuals, minibase_mod[,2], function(x){sd(x)/sqrt(length(x))}),
"n" = tapply(minibase_mod$residuals, minibase_mod[,2], length)
)
df_residual_dispersion_levels
# # # General Measures of Position (residuals)
df_residuals_position_general <- data.frame(
"min" = min(minibase_mod$residuals),
"mean" = mean(minibase_mod$residuals),
"median" = median(minibase_mod$residuals),
"max" = max(minibase_mod$residuals),
"n" = length(minibase_mod$residuals)
)
df_residuals_position_general
# # # General Measures of Dispersion (residuals)
df_residuals_dispersion_general <- data.frame(
"range" = max(minibase_mod$residuals) - min(minibase_mod$residuals),
"variance" = var(minibase_mod$residuals),
"standard_deviation" = sd(minibase_mod$residuals),
"standard_error" = sd(minibase_mod$residuals)/(sqrt(length(minibase_mod$residuals))),
"n" = length(minibase_mod$residuals)
)
df_residuals_dispersion_general
# # # # # Section 12 - Model estimators ----------------------------------------
# # # Means for each level
vector_est_mu_i <- df_vr_position_levels$mean
vector_est_mu_i
# # # Mean of means
est_mu <- mean(vector_est_mu_i)
vector_est_mu <- rep(est_mu, length(vector_est_mu_i))
vector_est_mu
# # # Tau efects
vector_est_tau_i <- vector_est_mu_i - vector_est_mu
vector_est_tau_i
# # # Sum of tau efects
sum_est_tau_i <- sum(vector_est_tau_i)
sum_est_tau_i
# # # Long model information on dataframe
df_anova1way_model_long <- data.frame(
"order" = df_factor_info$order,
"level" = df_factor_info$level,
"n" = df_factor_info$n,
"est_mu" = vector_est_mu,
"est_tau_i" = vector_est_tau_i
)
df_anova1way_model_long
# # # Short model information on dataframe
df_anova1way_model_short <- data.frame(
"order" = df_factor_info$order,
"level" = df_factor_info$level,
"n" = df_factor_info$n,
"est_mu_i" = vector_est_mu_i
)
df_anova1way_model_short
# # # # # Section 13 - Special table to plots ----------------------------------
# # # Table for plot001
df_table_factor_plot001 <- data.frame(
"order" = df_factor_info$order,
"level" = df_factor_info$level,
"n" = df_factor_info$n,
"mean" = tapply(minibase[,1], minibase[,2], mean),
"min" = tapply(minibase[,1], minibase[,2], min),
"max" = tapply(minibase[,1], minibase[,2], max),
"sd" = tapply(minibase[,1], minibase[,2], sd),
"var" = tapply(minibase[,1], minibase[,2], var)
)
df_table_factor_plot002 <- data.frame(
"order" = df_factor_info$order,
"level" = df_factor_info$level,
"n" = df_factor_info$n,
"mean" = tapply(minibase[,1], minibase[,2], mean),
"model_error_sd" = df_model_error$model_error_sd
)
df_table_factor_plot002["lower_limit"] <- df_table_factor_plot002$mean - df_table_factor_plot002$model_error_sd
df_table_factor_plot002["upper_limmit"] <- df_table_factor_plot002$mean + df_table_factor_plot002$model_error_sd
df_table_factor_plot002["color"] <- df_factor_info$color
df_table_factor_plot002
df_table_factor_plot003 <- data.frame(
"order" = df_factor_info$order,
"level" = df_factor_info$level,
"n" = df_factor_info$n,
"mean" = tapply(minibase[,1], minibase[,2], mean),
"model_error_se" = df_model_error$model_error_se
)
df_table_factor_plot003["lower_limit"] <- df_table_factor_plot003$mean - df_table_factor_plot003$model_error_se
df_table_factor_plot003["upper_limmit"] <- df_table_factor_plot003$mean + df_table_factor_plot003$model_error_se
df_table_factor_plot003["color"] <- df_factor_info$color
df_table_factor_plot003
# # # Table for plot004
df_table_factor_plot004 <- df_vr_position_levels
df_table_factor_plot004["color"] <- df_factor_info$color
# # # Table for plot005
df_table_factor_plot005 <- df_table_factor_plot004
# # # Table for plot006
df_table_factor_plot006 <- df_table_factor_plot004
df_table_factor_plot007 <- df_table_factor_plot003
correct_pos_letters <- order(df_tukey_table$level)
vector_letters <- df_tukey_table$group[correct_pos_letters]
df_table_factor_plot007["group"] <- vector_letters
# # # Table for plot006
df_table_residuals_plot001 <- data.frame(
"order" = 1:nlevels(minibase_mod[,2]),
"level" = levels(minibase_mod[,2]),
"n" = tapply(minibase_mod$residuals, minibase_mod[,2], length),
"min" = tapply(minibase_mod$residuals, minibase_mod[,2], min),
"mean" = tapply(minibase_mod$residuals, minibase_mod[,2], mean),
"max" = tapply(minibase_mod$residuals, minibase_mod[,2], max),
"var" = tapply(minibase_mod$residuals, minibase_mod[,2], var),
"sd" = tapply(minibase_mod$residuals, minibase_mod[,2], sd),
"color" = df_factor_info$color
)
df_table_residuals_plot001
# # # Table for plot006
df_table_residuals_plot002 <- df_table_residuals_plot001
# # # Table for plot006
df_table_residuals_plot003 <- df_table_residuals_plot001
# # # Table for plot006
df_table_residuals_plot004 <- data.frame(
"variable" = "residuals",
"n" = length(minibase_mod$residuals),
"min" = min(minibase_mod$residuals),
"mean" = mean(minibase_mod$residuals),
"max" = max(minibase_mod$residuals),
"var" = var(minibase_mod$residuals),
"sd" = sd(minibase_mod$residuals),
"model_error_var_MSE" = model_error_var_MSE,
"model_error_sd" = model_error_sd
)
phrase_coment_errors <- "Model Error (MSE) "
# # # Table for plot006
df_table_residuals_plot005 <- df_table_residuals_plot004
# # # Table for plot006
df_table_residuals_plot006 <- data.frame(
"order" = 1:nlevels(minibase_mod[,2]),
"level" = levels(minibase_mod[,2]),
"n" = tapply(minibase_mod$studres, minibase_mod[,2], length),
"min" = tapply(minibase_mod$studres, minibase_mod[,2], min),
"mean" = tapply(minibase_mod$studres, minibase_mod[,2], mean),
"max" = tapply(minibase_mod$studres, minibase_mod[,2], max),
"var" = tapply(minibase_mod$studres, minibase_mod[,2], var),
"sd" = tapply(minibase_mod$studres, minibase_mod[,2], sd),
"color" = df_factor_info$color
)
# # # Table for plot006
df_table_residuals_plot007 <- df_table_residuals_plot006
df_table_residuals_plot008 <- data.frame(
"variable" = "studres",
"n" = length(minibase_mod$studres),
"min" = min(minibase_mod$studres),
"mean" = mean(minibase_mod$studres),
"max" = max(minibase_mod$studres),
"var" = var(minibase_mod$studres),
"sd" = sd(minibase_mod$studres)
)
df_table_residuals_plot009 <- df_table_residuals_plot008
df_table_residuals_plot010 <- df_table_residuals_plot008
#############################################################
# # # Create a new plot...
plot001_factor <- plotly::plot_ly()
# # # Plot001 - Scatter plot for VR and FACTOR on minibase_mod *****************
plot001_factor <- plotly::add_trace(p = plot001_factor,
type = "scatter",
mode = "markers",
x = minibase_mod$FACTOR,
y = minibase_mod$VR,
color = minibase_mod$FACTOR,
colors = df_factor_info$color,
marker = list(size = 15, opacity = 0.7))
# # # Title and settings...
plot001_factor <- plotly::layout(p = plot001_factor,
title = "Plot 001 - Scatterplot",
font = list(size = 20),
margin = list(t = 100))
# # # Without zerolines
plot001_factor <- plotly::layout(p = plot001_factor,
xaxis = list(zeroline = FALSE),
yaxis = list(zeroline = FALSE))
# # # Plot output
plot001_factor
##############################################################################
# # # Create a new plot...
plot002_factor <- plot_ly()
# # # Adding errors...
plot002_factor <- add_trace(p = plot002_factor,
type = "scatter",
mode = "markers",
x = df_table_factor_plot002$level,
y = df_table_factor_plot002$mean,
color = df_table_factor_plot002$level,
colors = df_table_factor_plot002$color,
marker = list(symbol = "line-ew-open",
size = 50,
opacity = 1,
line = list(width = 5)),
error_y = list(type = "data", array = df_table_factor_plot002$model_error_sd)
)
# # # Title and settings...
plot002_factor <- plotly::layout(p = plot002_factor,
title = "Plot 002 - Mean and model standard deviation",
font = list(size = 20),
margin = list(t = 100))
# # # Without zerolines
plot002_factor <-plotly::layout(p = plot002_factor,
xaxis = list(zeroline = FALSE),
yaxis = list(zeroline = FALSE))
# # # Plot output
plot002_factor
##############################################################################
# # # Create a new plot...
plot003_factor <- plotly::plot_ly()
# # # Adding errors...
plot003_factor <- plotly::add_trace(p = plot003_factor,
type = "scatter",
mode = "markers",
x = df_table_factor_plot003$level,
y = df_table_factor_plot003$mean,
color = df_table_factor_plot003$level,
colors = df_table_factor_plot003$color,
marker = list(symbol = "line-ew-open",
size = 50,
opacity = 1,
line = list(width = 5)),
error_y = list(type = "data", array = df_table_factor_plot003$model_error_se)
)
# # # Title and settings...
plot003_factor <- plotly::layout(p = plot003_factor,
title = "Plot 003 - Mean y model standard error",
font = list(size = 20),
margin = list(t = 100))
# # # Without zerolines
plot003_factor <-plotly::layout(p = plot003_factor,
xaxis = list(zeroline = FALSE),
yaxis = list(zeroline = FALSE))
# # # Plot output
plot003_factor
##############################################################################
# # # New plotly...
plot004_factor <- plotly::plot_ly()
# # # Boxplot and info...
plot004_factor <- plotly::add_trace(p = plot004_factor,
type = "box",
x = df_table_factor_plot004$level ,
color = df_table_factor_plot004$level,
colors = df_table_factor_plot004$color,
lowerfence = df_table_factor_plot004$min,
q1 = df_table_factor_plot004$Q1,
median = df_table_factor_plot004$median,
q3 = df_table_factor_plot004$Q3,
upperfence = df_table_factor_plot004$max,
boxmean = TRUE,
boxpoints = FALSE,
line = list(color = "black", width = 3)
)
# # # Title and settings...
plot004_factor <- plotly::layout(p = plot004_factor,
title = "Plot 004 - Boxplot and means",
font = list(size = 20),
margin = list(t = 100))
# # # Without zerolines...
plot004_factor <- plotly::layout(p = plot004_factor,
xaxis = list(zeroline = FALSE),
yaxis = list(zeroline = FALSE))
# # # Output plot004_anova...
plot004_factor
##############################################################################
all_levels <- levels(minibase_mod[,2])
n_levels <- length(all_levels)
all_color <- rainbow(length(all_levels))
plot005_factor <- plot_ly()
# Violinplot
for (k in 1:n_levels){
# Selected values
selected_level <- all_levels[k]
selected_color <- all_color[k]
dt_filas <- minibase_mod[,2] == selected_level
# Plotting selected violinplot
plot005_factor <- plot005_factor %>%
add_trace(x = minibase_mod[,2][dt_filas],
y = minibase_mod[,1][dt_filas],
type = "violin",
name = paste0("violin", k),
points = "all",
marker = list(color = selected_color),
line = list(color = selected_color),
fillcolor = I(selected_color)
)
}
# Boxplot
plot005_factor <- plotly::add_trace(p = plot005_factor,
type = "box",
name = "boxplot",
x = df_table_factor_plot005$level ,
color = df_table_factor_plot005$level ,
lowerfence = df_table_factor_plot005$min,
q1 = df_table_factor_plot005$Q1,
median = df_table_factor_plot005$median,
q3 = df_table_factor_plot005$Q3,
upperfence = df_table_factor_plot005$max,
boxmean = TRUE,
boxpoints = TRUE,
fillcolor = df_table_factor_plot005$color,
line = list(color = "black", width = 3),
opacity = 0.5,
width = 0.2)
# # # Title and settings...
plot005_factor <- plotly::layout(p = plot005_factor,
title = "Plot 005 - Violinplot",
font = list(size = 20),
margin = list(t = 100))
# # # Without zerolines...
plot005_factor <- plotly::layout(p = plot005_factor,
xaxis = list(zeroline = FALSE),
yaxis = list(zeroline = FALSE))
# # # Output plot003_anova...
plot005_factor
##############################################################################
#library(plotly)
plot006_factor <- plotly::plot_ly()
# Add traces
plot006_factor <- plotly::add_trace(p = plot006_factor,
type = "violin",
y = minibase_mod$VR,
x = minibase_mod$FACTOR,
showlegend = TRUE,
side = "positive",
points = "all",
name = "Violinplot",
color = minibase_mod$FACTOR,
colors = df_table_factor_plot006$color)
# # # Title and settings...
plot006_factor <- plotly::layout(p = plot006_factor,
title = "Plot 006 - Scatterplot + Jitter + Smoothed",
font = list(size = 20),
margin = list(t = 100))
# # # Without zerolines...
plot006_factor <- plotly::layout(p = plot006_factor,
xaxis = list(zeroline = FALSE),
yaxis = list(zeroline = FALSE))
# # # Output plot003_anova...
plot006_factor
##############################################################################
# # # Create a new plot...
plot007_factor <- plotly::plot_ly()
# # # Adding errors...
plot007_factor <- plotly::add_trace(p = plot007_factor,
type = "scatter",
mode = "markers",
x = df_table_factor_plot007$level,
y = df_table_factor_plot007$mean,
color = df_table_factor_plot007$level,
colors = df_table_factor_plot007$color,
marker = list(symbol = "line-ew-open",
size = 50,
opacity = 1,
line = list(width = 5)),
error_y = list(type = "data", array = df_table_factor_plot007$model_error_se)
)
plot007_factor <- add_text(p = plot007_factor,
x = df_table_factor_plot007$level,
y = df_table_factor_plot007$mean,
text = df_table_factor_plot007$group, name = "Tukey Group",
size = 20)
# # # Title and settings...
plot007_factor <- plotly::layout(p = plot007_factor,
title = "Plot 007 - Mean y model standard error",
font = list(size = 20),
margin = list(t = 100))
# # # Without zerolines
plot007_factor <-plotly::layout(p = plot007_factor,
xaxis = list(zeroline = FALSE),
yaxis = list(zeroline = FALSE))
# # # Plot output
plot007_factor
#######----
####### DESDE ACAAAAAAAAAAAAAAAAAAAA
# # # Create a new plot...
plot001_residuals <- plotly::plot_ly()
# # # Plot001 - Scatter plot for VR and FACTOR on minibase_mod *****************
plot001_residuals <- plotly::add_trace(p = plot001_residuals,
type = "scatter",
mode = "markers",
x = minibase_mod$FACTOR,
y = minibase_mod$residuals,
color = minibase_mod$FACTOR,
colors = df_factor_info$color,
marker = list(size = 15, opacity = 0.7))
# # # Title and settings...
plot001_residuals <- plotly::layout(p = plot001_residuals,
title = "Plot 001 - Scatterplot - Residuals",
font = list(size = 20),
margin = list(t = 100))
# # # Without zerolines
plot001_residuals <- plotly::layout(p = plot001_residuals,
xaxis = list(zeroline = FALSE),
yaxis = list(zeroline = TRUE))
# # # Plot output
plot001_residuals
#library(plotly)
plot002_residuals <- plotly::plot_ly()
# Add traces
plot002_residuals <- plotly::add_trace(p = plot002_residuals,
type = "violin",
y = minibase_mod$residuals,
x = minibase_mod$FACTOR,
showlegend = TRUE,
side = "positive",
points = "all",
name = "Violinplot",
color = minibase_mod$FACTOR,
colors = df_table_residuals_plot002$color)
# # # Title and settings...
plot002_residuals <- plotly::layout(p = plot002_residuals,
title = "Plot 002 - Residuals - Scatterplot + Jitter + Smoothed",
font = list(size = 20),
margin = list(t = 100))
# # # Without zerolines...
plot002_residuals <- plotly::layout(p = plot002_residuals,
xaxis = list(zeroline = FALSE),
yaxis = list(zeroline = FALSE))
# # # Output plot003_anova...
plot002_residuals
plot003_residuals <- plotly::plot_ly()
# Add traces
plot003_residuals <- plotly::add_trace(p = plot003_residuals,
type = "violin",
x = minibase_mod$residuals,
showlegend = TRUE,
side = "positive",
points = FALSE,
#name = levels(minibase_mod$FACTOR)[minibase_mod$lvl_order_number],
color = minibase_mod$FACTOR,
colors = df_table_residuals_plot003$color)
# # # Title and settings...
plot003_residuals <- plotly::layout(p = plot003_residuals,
title = "Plot 003 - Residuals - Scatterplot + Jitter + Smoothed",
font = list(size = 20),
margin = list(t = 100))
# # # Without zerolines...
plot003residuals <- plotly::layout(p = plot003_residuals,
xaxis = list(zeroline = FALSE),
yaxis = list(zeroline = FALSE))
# # # Output plot003_anova...
plot003_residuals
plot004_residuals <- plotly::plot_ly()
# Add traces
plot004_residuals <- plotly::add_trace(p = plot004_residuals,
type = "violin",
x = minibase_mod$residuals,
#x = minibase_mod$FACTOR,
showlegend = TRUE,
side = "positive",
points = "all",
name = " ")#
#color = minibase_mod$FACTOR,
#colors = df_table_factor_plot006$color)
# # # Title and settings...
plot004_residuals <- plotly::layout(p = plot004_residuals,
title = "Plot 004 - Residuals",
font = list(size = 20),
margin = list(t = 100))
# # # Without zerolines...
plot004_residuals <- plotly::layout(p = plot004_residuals,
xaxis = list(zeroline = TRUE),
yaxis = list(zeroline = FALSE))
# # # Output plot003_anova...
plot004_residuals
# - el 5
qq_info <- EnvStats::qqPlot(x = minibase_mod$residuals, plot.it = F,
param.list = list(mean = mean(minibase_mod$residuals),
sd = sd(minibase_mod$residuals)))
cuantiles_teoricos <- qq_info$x
cuantiles_observados <- qq_info$y
#library(plotly)
plot005_residuals <- plotly::plot_ly()
# Crear el gráfico QQ plot
plot005_residuals <-add_trace(p = plot005_residuals,
x = cuantiles_teoricos,
y = cuantiles_observados,
type = 'scatter', mode = 'markers',
marker = list(color = 'blue'),
name = "points")
# Agregar la línea de identidad
pendiente <- 1
intercepto <- 0
# Calcular las coordenadas de los extremos de la línea de identidad
x_extremos <- range(cuantiles_teoricos)
y_extremos <- pendiente * x_extremos + intercepto
# Agregar la recta de identidad
plot005_residuals <- add_trace(p = plot005_residuals,
x = x_extremos,
y = y_extremos,
type = 'scatter',
mode = 'lines',
line = list(color = 'red'),
name = "identity")
# Establecer etiquetas de los ejes
# plot007_residuals <- layout(p = plot007_residuals,
# xaxis = list(title = 'Expected quantiles'),
# yaxis = list(title = 'Observed quantiles'))
plot005_residuals <- plotly::layout(p = plot005_residuals,
title = "Plot 005 - QQ Plot Residuals",
font = list(size = 20),
margin = list(t = 100))
# Mostrar el gráfico
plot005_residuals
# - Fin el 5
# # # Create a new plot...
plot006_residuals <- plotly::plot_ly()
# # # Plot001 - Scatter plot for VR and FACTOR on minibase_mod *****************
plot006_residuals <- plotly::add_trace(p = plot006_residuals,
type = "scatter",
mode = "markers",
x = minibase_mod$fitted.values,
y = minibase_mod$residuals,
color = minibase_mod$FACTOR,
colors = df_factor_info$color,
marker = list(size = 15, opacity = 0.7))
# # # Title and settings...
plot006_residuals <- plotly::layout(p = plot006_residuals,
title = "Plot 006 - Scatterplot - Residuals vs Fitted.values",
font = list(size = 20),
margin = list(t = 100))
# # # Without zerolines
plot006_residuals <- plotly::layout(p = plot006_residuals,
xaxis = list(zeroline = FALSE),
yaxis = list(zeroline = TRUE))
# # # Plot output
plot006_residuals
# # # Create a new plot...
plot007_residuals <- plotly::plot_ly()
# # # Plot001 - Scatter plot for VR and FACTOR on minibase_mod *****************
plot007_residuals <- plotly::add_trace(p = plot007_residuals,
type = "scatter",
mode = "markers",
x = minibase_mod$FACTOR,
y = minibase_mod$studres,
color = minibase_mod$FACTOR,
colors = df_factor_info$color,
marker = list(size = 15, opacity = 0.7))
# # # Title and settings...
plot007_residuals <- plotly::layout(p = plot007_residuals,
title = "Plot 007 - Scatterplot - Studentized Residuals",
font = list(size = 20),
margin = list(t = 100))
# # # Without zerolines
plot007_residuals <- plotly::layout(p = plot007_residuals,
xaxis = list(zeroline = FALSE),
yaxis = list(zeroline = TRUE))
# # # Plot output
plot007_residuals
#library(plotly)
plot008_residuals <- plotly::plot_ly()
# Add traces
plot008_residuals <- plotly::add_trace(p = plot008_residuals,
type = "violin",
x = minibase_mod$studres,
#x = minibase_mod$FACTOR,
showlegend = TRUE,
side = "positive",
points = "all",
name = " ")#
#color = minibase_mod$FACTOR,
#colors = df_table_factor_plot006$color)
# # # Title and settings...
plot008_residuals <- plotly::layout(p = plot008_residuals,
title = "Plot 008 - Studentized Residuals",
font = list(size = 20),
margin = list(t = 100))
# # # Without zerolines...
plot008_residuals <- plotly::layout(p = plot008_residuals,
xaxis = list(zeroline = TRUE),
yaxis = list(zeroline = FALSE))
# # # Output plot003_anova...
plot008_residuals
# el 9
x <- seq(-4, 4, length.out = 100)
y <- dnorm(x, mean = 0, 1)
# x <- x*model_error_sd
densidad_suavizada <- density(x, kernel = "gaussian", adjust = 0.5)
hist_data_studres <- hist(minibase_mod$studres, plot = FALSE)
hist_data_studres$"rel_frec" <- hist_data_studres$counts/sum(hist_data_studres$counts)
densidad_studres <- density(x = minibase_mod$studres, kernel = "gaussian", adjust =0.5)
#library(plotly)
plot009_residuals <- plotly::plot_ly()
# plot005_residuals <- add_trace(p = plot005_residuals,
# x = densidad_studres$x,
# y = densidad_studres$y,
# type = 'scatter',
# mode = 'lines',
# name = 'densidad_studres')
plot009_residuals <- add_trace(p = plot009_residuals,
x = x,
y = y,
type = 'scatter',
mode = 'lines',
name = 'Normal Standard')
# # Add traces
# plot005_residuals <- plotly::add_trace(p = plot005_residuals,
# type = "violin",
# x = minibase_mod$residuals,
# #x = minibase_mod$FACTOR,
# showlegend = TRUE,
# side = "positive",
# points = FALSE,
# name = "violinplot")#
plot009_residuals <- plotly::add_trace(p = plot009_residuals,
type = "bar",
x = hist_data_studres$"mids",
y = hist_data_studres$"density",
name = "hist - studres")
plot009_residuals <- plotly::layout(p = plot009_residuals,
bargap = 0)
plot009_residuals <- plotly::layout(p = plot009_residuals,
title = "Plot 009 - Studres Distribution",
font = list(size = 20),
margin = list(t = 100))
plot009_residuals
# fin el 9
qq_info <- EnvStats::qqPlot(x = minibase_mod$studres, plot.it = F,
param.list = list(mean = 0,
sd = 1))
cuantiles_teoricos <- qq_info$x
cuantiles_observados <- qq_info$y
#library(plotly)
plot010_residuals <- plotly::plot_ly()
# Crear el gráfico QQ plot
plot010_residuals <-add_trace(p = plot010_residuals,
x = cuantiles_teoricos,
y = cuantiles_observados,
type = 'scatter', mode = 'markers',
marker = list(color = 'blue'),
name = "points")
# Agregar la línea de identidad
pendiente <- 1
intercepto <- 0
# Calcular las coordenadas de los extremos de la línea de identidad
x_extremos <- range(cuantiles_teoricos)
y_extremos <- pendiente * x_extremos + intercepto
# Agregar la recta de identidad
plot010_residuals <- add_trace(p = plot010_residuals,
x = x_extremos,
y = y_extremos,
type = 'scatter',
mode = 'lines',
line = list(color = 'red'),
name = "identity")
# Establecer etiquetas de los ejes
# plot007_residuals <- layout(p = plot007_residuals,
# xaxis = list(title = 'Expected quantiles'),
# yaxis = list(title = 'Observed quantiles'))
plot010_residuals <- plotly::layout(p = plot010_residuals,
title = "Plot 010 - QQ Plot - studres",
font = list(size = 20),
margin = list(t = 100))
# Mostrar el gráfico
plot010_residuals